ggplotを使うときの注意点,複数の図に分割する方法やデータを計算・要約する方法などを扱う
表示されても,記述した内容を確認する必要がある.
# 国別の時系列データを描きたい
p <- ggplot(data = gapminder,
mapping = aes(x = year, y = gdpPercap))
p + geom_line()
# ミスが起きても気にしない
# 国別の時系列データ
# 明示的にコードを書く必要がある
# 大きな外れ値はクウェート
p <- ggplot(data = gapminder,
mapping = aes(x = year, y = gdpPercap))
p + geom_line(mapping = aes(group = country))
# 大陸ごとに色分けした,国別のgdpPercapの時系列データ
p <- ggplot(data = gapminder,
mapping = aes(x = year, y = gdpPercap, color = continent))
p + geom_line(mapping = aes(group = country))
facet_関数群は対象の変数ごとに図を切り出して,パネル形式で出力するための関数群.
# facet_wrap()関数を使って,大陸ごとに層別化された国の時系列データを一枚の図に納める.
p <- ggplot(data = gapminder,
mapping = aes(x = year,
y = gdpPercap))
p + geom_line(mapping = aes(group = country)) +
facet_wrap( ~ continent, ncol = 3)
# 黒い線は見にくいため変更する
# その他色々変更する
p <- ggplot(data = gapminder,
mapping = aes(x = year, y = gdpPercap))
p + geom_line(color = "gray", mapping = aes(group = country)) +
geom_smooth(size = 1.1, method = "loess", se = FALSE) +
scale_y_log10(labels = scales::dollar) +
facet_wrap( ~ continent, ncol = 3) +
labs(x = "year", y = "log GDP per capita",
title = "GDP per capita on Five Continents")
## `geom_smooth()` using formula 'y ~ x'
# データを2種類のカテゴリ変数に基づいて相互に分類したい
# 新たなデータセットgss_smを用いる
# gss_sm: 米国の成人を対象とした社会科学者が興味のあるトピックについての質問調査票
# socvizパッケージにある
glimpse(gss_sm) # 変数の概要を確認
## Rows: 2,867
## Columns: 32
## $ year <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 20…
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ ballot <labelled> 1, 2, 3, 1, 3, 2, 1, 3, 1, 3, 2, 1, 2, 3, 2, 3, 3, …
## $ age <dbl> 47, 61, 72, 43, 55, 53, 50, 23, 45, 71, 33, 86, 32, 60, …
## $ childs <dbl> 3, 0, 2, 4, 2, 2, 2, 3, 3, 4, 5, 4, 3, 5, 7, 2, 6, 5, 0,…
## $ sibs <labelled> 2, 3, 3, 3, 2, 2, 2, 6, 5, 1, 4, 4, 3, 6, 0, 1, 3, …
## $ degree <fct> Bachelor, High School, Bachelor, High School, Graduate, …
## $ race <fct> White, White, White, White, White, White, White, Other, …
## $ sex <fct> Male, Male, Male, Female, Female, Female, Male, Female, …
## $ region <fct> New England, New England, New England, New England, New …
## $ income16 <fct> $170000 or over, $50000 to 59999, $75000 to $89999, $170…
## $ relig <fct> None, None, Catholic, Catholic, None, None, None, Cathol…
## $ marital <fct> Married, Never Married, Married, Married, Married, Marri…
## $ padeg <fct> Graduate, Lt High School, High School, NA, Bachelor, NA,…
## $ madeg <fct> High School, High School, Lt High School, High School, H…
## $ partyid <fct> "Independent", "Ind,near Dem", "Not Str Republican", "No…
## $ polviews <fct> Moderate, Liberal, Conservative, Moderate, Slightly Libe…
## $ happy <fct> Pretty Happy, Pretty Happy, Very Happy, Pretty Happy, Ve…
## $ partners <fct> NA, 1 Partner, 1 Partner, NA, 1 Partner, 1 Partner, NA, …
## $ grass <fct> NA, Legal, Not Legal, NA, Legal, Legal, NA, Not Legal, N…
## $ zodiac <fct> Aquarius, Scorpio, Pisces, Cancer, Scorpio, Scorpio, Cap…
## $ pres12 <labelled> 3, 1, 2, 2, 1, 1, NA, NA, NA, 2, NA, NA, 1, 1, 2, 1…
## $ wtssall <dbl> 0.9569935, 0.4784968, 0.9569935, 1.9139870, 1.4354903, 0…
## $ income_rc <fct> Gt $170000, Gt $50000, Gt $75000, Gt $170000, Gt $170000…
## $ agegrp <fct> Age 45-55, Age 55-65, Age 65+, Age 35-45, Age 45-55, Age…
## $ ageq <fct> Age 34-49, Age 49-62, Age 62+, Age 34-49, Age 49-62, Age…
## $ siblings <fct> 2, 3, 3, 3, 2, 2, 2, 6+, 5, 1, 4, 4, 3, 6+, 0, 1, 3, 6+,…
## $ kids <fct> 3, 0, 2, 4+, 2, 2, 2, 3, 3, 4+, 4+, 4+, 3, 4+, 4+, 2, 4+…
## $ religion <fct> None, None, Catholic, Catholic, None, None, None, Cathol…
## $ bigregion <fct> Northeast, Northeast, Northeast, Northeast, Northeast, N…
## $ partners_rc <fct> NA, 1, 1, NA, 1, 1, NA, 1, NA, 3, 1, NA, 1, NA, 0, 1, 0,…
## $ obama <dbl> 0, 1, 0, 0, 1, 1, NA, NA, NA, 0, NA, NA, 1, 1, 0, 1, 0, …
p <- ggplot(data = gss_sm,
mapping = aes(x = age, y = childs))
# 縦方向に性別(sex), 横方向に人種(race)を層別した年齢別の子供の数
p + geom_point(alpha = 0.2) +
geom_smooth() +
facet_grid(sex ~ race)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 18 rows containing non-finite values (stat_smooth).
## Warning: Removed 18 rows containing missing values (geom_point).
geom_()関数はstat_関数と紐づけられている
# 棒グラフ
# geom_bar()の中でstat_count()関数により算出される
p <- ggplot(data = gss_sm, mapping = aes(x = bigregion, fill = bigregion)) # colorだと縁だけしか変わらない
p + geom_bar()
# 割合グラフを作りたい場合
# しかしこれだと全て1になる
p <- ggplot(data = gss_sm, mapping = aes(x = bigregion, fill = bigregion))
p + geom_bar(mapping = aes(y = after_stat(prop)))
# ダミーグループとして1を与える.
# その場合,fillによる色分けは無効になっている(∵ グループが優先されるから?)
p <- ggplot(data = gss_sm, mapping = aes(x = bigregion, fill = bigregion))
p + geom_bar(mapping = aes(y = after_stat(prop), group = 1))
# 各信仰がどれくらいの人がいるか
table(gss_sm$religion)
##
## Protestant Catholic Jewish None Other
## 1371 649 51 619 159
# これだと枠だけが色分けされる
p <- ggplot(data = gss_sm, mapping = aes(x = religion, color = religion))
p + geom_bar() +
guides(fill = "none") # これだと判例はなくならない
p <- ggplot(data = gss_sm, mapping = aes(x = religion, fill = religion))
p + geom_bar() +
guides(fill = "none") # これで判例をなくすことが出来る
# geom_bar(mapping = aes(fill = religion))
# としても同じように出る
# 地域(bigregion)ごとの信仰(religion)の人の数
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion))
p + geom_bar()
# しかしこのグラフは比較が困難(e.g. Catholicが分かりくい)
# 比率のグラフにしよう
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion))
p + geom_bar(position = "fill") # このfillは審美的要素ではない
# しかしこのグラフでは相対的な大きさを評価出来ない
# 各地域内の信仰の割合を比較することは可能
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion,
fill = religion))
p + geom_bar(position = "dodge") # position = "dodge"とすることで横並びになる
# あとは割合になるようにafter_stat(prop)を用いる
# その際にはグループをグループ化したい変数(religionで指定する)
p <- ggplot(data = gss_sm,
mapping = aes(x = bigregion, fill = religion))
p + geom_bar(position = "dodge",
mapping = aes(y = after_stat(prop), group = religion))
# このグラフは各地域ごとの割合の合計が1となるのではなく,
# それぞれの信仰の合計が1とした各地域別の信仰の割合となる
# つまり,信仰をJweishと答えた人の5割は北東部に住んでいることが分かる
# しかしながら,これも当初作りたかったグラフではない
# それぞれの項目を横並びにして,それぞれの高さを比較したい
# facet_warpを使って地域を層別する
p <- ggplot(data = gss_sm,
mapping = aes(x = religion))
p + geom_bar(position = "dodge",
mapping = aes(y = after_stat(prop), group = bigregion)) +
facet_wrap( ~ bigregion, ncol = 2)
頻度をプロットする場合,ggplotで全てを行おうとすると沼にハマりがち.dplyrパッケージを使った方が信頼性が高く,エラーの確認も容易な方法になる.それは第5章で述べる.
連続量をヒストグラムとして描く時は,binを指定する必要がある(デフォルトはbins = 30).ヒストグラムのbinは注意が必要
# midwestデータを用いる
glimpse(midwest)
## Rows: 437
## Columns: 28
## $ PID <int> 561, 562, 563, 564, 565, 566, 567, 568, 569, 57…
## $ county <chr> "ADAMS", "ALEXANDER", "BOND", "BOONE", "BROWN",…
## $ state <chr> "IL", "IL", "IL", "IL", "IL", "IL", "IL", "IL",…
## $ area <dbl> 0.052, 0.014, 0.022, 0.017, 0.018, 0.050, 0.017…
## $ poptotal <int> 66090, 10626, 14991, 30806, 5836, 35688, 5322, …
## $ popdensity <dbl> 1270.9615, 759.0000, 681.4091, 1812.1176, 324.2…
## $ popwhite <int> 63917, 7054, 14477, 29344, 5264, 35157, 5298, 1…
## $ popblack <int> 1702, 3496, 429, 127, 547, 50, 1, 111, 16, 1655…
## $ popamerindian <int> 98, 19, 35, 46, 14, 65, 8, 30, 8, 331, 51, 26, …
## $ popasian <int> 249, 48, 16, 150, 5, 195, 15, 61, 23, 8033, 89,…
## $ popother <int> 124, 9, 34, 1139, 6, 221, 0, 84, 6, 1596, 20, 7…
## $ percwhite <dbl> 96.71206, 66.38434, 96.57128, 95.25417, 90.1987…
## $ percblack <dbl> 2.57527614, 32.90043290, 2.86171703, 0.41225735…
## $ percamerindan <dbl> 0.14828264, 0.17880670, 0.23347342, 0.14932156,…
## $ percasian <dbl> 0.37675897, 0.45172219, 0.10673071, 0.48691813,…
## $ percother <dbl> 0.18762294, 0.08469791, 0.22680275, 3.69733169,…
## $ popadults <int> 43298, 6724, 9669, 19272, 3979, 23444, 3583, 11…
## $ perchsd <dbl> 75.10740, 59.72635, 69.33499, 75.47219, 68.8615…
## $ percollege <dbl> 19.63139, 11.24331, 17.03382, 17.27895, 14.4760…
## $ percprof <dbl> 4.355859, 2.870315, 4.488572, 4.197800, 3.36768…
## $ poppovertyknown <int> 63628, 10529, 14235, 30337, 4815, 35107, 5241, …
## $ percpovertyknown <dbl> 96.27478, 99.08714, 94.95697, 98.47757, 82.5051…
## $ percbelowpoverty <dbl> 13.151443, 32.244278, 12.068844, 7.209019, 13.5…
## $ percchildbelowpovert <dbl> 18.011717, 45.826514, 14.036061, 11.179536, 13.…
## $ percadultpoverty <dbl> 11.009776, 27.385647, 10.852090, 5.536013, 11.1…
## $ percelderlypoverty <dbl> 12.443812, 25.228976, 12.697410, 6.217047, 19.2…
## $ inmetro <int> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,…
## $ category <chr> "AAR", "LHR", "AAR", "ALU", "AAR", "AAR", "LAR"…
p <- ggplot(data = midwest,
mapping = aes(x = area))
p + geom_histogram(bins = 10) # bins: binの数
p + geom_histogram(binwidth = 0.01) # binwidth: binの幅
# 複数のヒストグラムをまとめて図示できる.
oh_wi <- c("OH", "WI") # OH と WI の2州だけを取り出す
p <- ggplot(data = subset(midwest, subset = state %in% oh_wi),
mapping = aes(x = percollege, fill = state))
p + geom_histogram(alpha = 0.4, bins = 20)
# カーネル密度推定を使うのも手
p <- ggplot(data = midwest,
mapping = aes(x = area))
p + geom_density()
# 州で色分け
# fillは密度曲線の本体に対して,colorは線に対して効果を示す
p <- ggplot(data = midwest,
mapping = aes(x = area, fill = state, color = state))
p + geom_density(alpha = 0.1)
# 重なって見にくい場合,geom_line(stat = "density")を使うことで線だけにできる
p <- ggplot(data = midwest,
mapping = aes(x = area, color = state))
p + geom_line(stat = "density")
# geom_bar()のように,geom_histgram()やgeom_density()でも相対値を求めることも可能
p <- ggplot(data = midwest,
mapping = aes(x = area, color = state, fill = state))
p + geom_density(alpha = 0.3,
mapping = aes(y = after_stat(scaled)))
# countは密度とデータ点の数をかけ合わせた統計量を返す
p <- ggplot(data = midwest,
mapping = aes(x = area, color = state, fill = state))
p + geom_density(alpha = 0.3,
mapping = aes(y = after_stat(count)))
得られたデータがすでに要約されている場合など,変換を避けたい場合はstat = “identity”と記述する
# titanic: タイタニック号の生存者に関するデータ
titanic
## fate sex n percent
## 1 perished male 1364 62.0
## 2 perished female 126 5.7
## 3 survived male 367 16.7
## 4 survived female 344 15.6
# タイタニックデータにおける性別ごとの生存者割合
p <- ggplot(data = titanic,
mapping = aes(x = fate, y = percent, fill = sex))
p + geom_bar(position = "dodge", stat = "identity") + # 対象となる変数に対して変換しない場合はstat = "identity"とする
theme(legend.position = "top")
# 代わりに,geom_colを使える
p + geom_col(position = "dodge")
# oecd_sum: アメリカおよびOECD諸国の出生時の平均寿命に関連する情報
# otherカラムはアメリカ合衆国以外の平均寿命
# socvizパッケージにある
glimpse(oecd_sum)
## Rows: 57
## Columns: 5
## Groups: year [57]
## $ year <int> 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 19…
## $ other <dbl> 68.6, 69.2, 68.9, 69.1, 69.5, 69.6, 69.9, 70.1, 70.1, 70.1, 69…
## $ usa <dbl> 69.9, 70.4, 70.2, 70.0, 70.3, 70.3, 70.3, 70.7, 70.4, 70.6, 70…
## $ diff <dbl> 1.3, 1.2, 1.3, 0.9, 0.8, 0.7, 0.4, 0.6, 0.3, 0.5, 1.1, 0.8, 0.…
## $ hi_lo <chr> "Below", "Below", "Below", "Below", "Below", "Below", "Below",…
oecd_sum
## # A tibble: 57 x 5
## # Groups: year [57]
## year other usa diff hi_lo
## <int> <dbl> <dbl> <dbl> <chr>
## 1 1960 68.6 69.9 1.3 Below
## 2 1961 69.2 70.4 1.2 Below
## 3 1962 68.9 70.2 1.30 Below
## 4 1963 69.1 70 0.9 Below
## 5 1964 69.5 70.3 0.800 Below
## 6 1965 69.6 70.3 0.7 Below
## 7 1966 69.9 70.3 0.400 Below
## 8 1967 70.1 70.7 0.6 Below
## 9 1968 70.1 70.4 0.3 Below
## 10 1969 70.1 70.6 0.5 Below
## # … with 47 more rows
p <- ggplot(data = oecd_sum,
mapping = aes(x = year, y = diff, fill = hi_lo))
p + geom_col() +
guides(fill = "none") + # 凡例を消す
labs(x = NULL, y = "Different in years",
title = "The US Life Expectancy Gap",
subtitle = "Dofference between US and OECD average life expectancy, 1960-2015",
caption = "Data: OECD. After a chart by Christpher Ingraham, Washington Post, December 27th 2017"
)
## Warning: Removed 1 rows containing missing values (position_stack).
# gapminderデータセットに対して,色々な変数に対して,facetによる層別化を試す
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghan…
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia…
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997…
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, …
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134…
unique(gapminder$country)
## [1] Afghanistan Albania Algeria
## [4] Angola Argentina Australia
## [7] Austria Bahrain Bangladesh
## [10] Belgium Benin Bolivia
## [13] Bosnia and Herzegovina Botswana Brazil
## [16] Bulgaria Burkina Faso Burundi
## [19] Cambodia Cameroon Canada
## [22] Central African Republic Chad Chile
## [25] China Colombia Comoros
## [28] Congo, Dem. Rep. Congo, Rep. Costa Rica
## [31] Cote d'Ivoire Croatia Cuba
## [34] Czech Republic Denmark Djibouti
## [37] Dominican Republic Ecuador Egypt
## [40] El Salvador Equatorial Guinea Eritrea
## [43] Ethiopia Finland France
## [46] Gabon Gambia Germany
## [49] Ghana Greece Guatemala
## [52] Guinea Guinea-Bissau Haiti
## [55] Honduras Hong Kong, China Hungary
## [58] Iceland India Indonesia
## [61] Iran Iraq Ireland
## [64] Israel Italy Jamaica
## [67] Japan Jordan Kenya
## [70] Korea, Dem. Rep. Korea, Rep. Kuwait
## [73] Lebanon Lesotho Liberia
## [76] Libya Madagascar Malawi
## [79] Malaysia Mali Mauritania
## [82] Mauritius Mexico Mongolia
## [85] Montenegro Morocco Mozambique
## [88] Myanmar Namibia Nepal
## [91] Netherlands New Zealand Nicaragua
## [94] Niger Nigeria Norway
## [97] Oman Pakistan Panama
## [100] Paraguay Peru Philippines
## [103] Poland Portugal Puerto Rico
## [106] Reunion Romania Rwanda
## [109] Sao Tome and Principe Saudi Arabia Senegal
## [112] Serbia Sierra Leone Singapore
## [115] Slovak Republic Slovenia Somalia
## [118] South Africa Spain Sri Lanka
## [121] Sudan Swaziland Sweden
## [124] Switzerland Syria Taiwan
## [127] Tanzania Thailand Togo
## [130] Trinidad and Tobago Tunisia Turkey
## [133] Uganda United Kingdom United States
## [136] Uruguay Venezuela Vietnam
## [139] West Bank and Gaza Yemen, Rep. Zambia
## [142] Zimbabwe
## 142 Levels: Afghanistan Albania Algeria Angola Argentina Australia ... Zimbabwe
unique(gapminder$year)
## [1] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
# 年代を層別したの人口とGDPの散布図
p <- ggplot(data = gapminder,
mapping = aes(x = pop, y = gdpPercap, fill = year))
p.out <- p + geom_point() +
facet_wrap(~ year, ncol = 2) +
guides(fill = "none")
ggsave(filename = "pop_gdp_facet_year.pdf", plot = p.out)
## Saving 7 x 5 in image
#
p <- ggplot(data = gapminder,
mapping = aes(x = pop, y = gdpPercap, color = year))
p.out <- p + geom_point() +
scale_x_log10() +
facet_wrap(~ country, ncol = 4)
ggsave(filename = "pop_gdp_year_facet_country.jpg", plot = p.out,
height = 40, width = 20, units = "in")
# facet_gridとfacet_wrapについて考察する
# gss_smデータセットを用いる
# xはage,yはchildsで,sexとraceで層別
glimpse(gss_sm)
## Rows: 2,867
## Columns: 32
## $ year <dbl> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 20…
## $ id <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ ballot <labelled> 1, 2, 3, 1, 3, 2, 1, 3, 1, 3, 2, 1, 2, 3, 2, 3, 3, …
## $ age <dbl> 47, 61, 72, 43, 55, 53, 50, 23, 45, 71, 33, 86, 32, 60, …
## $ childs <dbl> 3, 0, 2, 4, 2, 2, 2, 3, 3, 4, 5, 4, 3, 5, 7, 2, 6, 5, 0,…
## $ sibs <labelled> 2, 3, 3, 3, 2, 2, 2, 6, 5, 1, 4, 4, 3, 6, 0, 1, 3, …
## $ degree <fct> Bachelor, High School, Bachelor, High School, Graduate, …
## $ race <fct> White, White, White, White, White, White, White, Other, …
## $ sex <fct> Male, Male, Male, Female, Female, Female, Male, Female, …
## $ region <fct> New England, New England, New England, New England, New …
## $ income16 <fct> $170000 or over, $50000 to 59999, $75000 to $89999, $170…
## $ relig <fct> None, None, Catholic, Catholic, None, None, None, Cathol…
## $ marital <fct> Married, Never Married, Married, Married, Married, Marri…
## $ padeg <fct> Graduate, Lt High School, High School, NA, Bachelor, NA,…
## $ madeg <fct> High School, High School, Lt High School, High School, H…
## $ partyid <fct> "Independent", "Ind,near Dem", "Not Str Republican", "No…
## $ polviews <fct> Moderate, Liberal, Conservative, Moderate, Slightly Libe…
## $ happy <fct> Pretty Happy, Pretty Happy, Very Happy, Pretty Happy, Ve…
## $ partners <fct> NA, 1 Partner, 1 Partner, NA, 1 Partner, 1 Partner, NA, …
## $ grass <fct> NA, Legal, Not Legal, NA, Legal, Legal, NA, Not Legal, N…
## $ zodiac <fct> Aquarius, Scorpio, Pisces, Cancer, Scorpio, Scorpio, Cap…
## $ pres12 <labelled> 3, 1, 2, 2, 1, 1, NA, NA, NA, 2, NA, NA, 1, 1, 2, 1…
## $ wtssall <dbl> 0.9569935, 0.4784968, 0.9569935, 1.9139870, 1.4354903, 0…
## $ income_rc <fct> Gt $170000, Gt $50000, Gt $75000, Gt $170000, Gt $170000…
## $ agegrp <fct> Age 45-55, Age 55-65, Age 65+, Age 35-45, Age 45-55, Age…
## $ ageq <fct> Age 34-49, Age 49-62, Age 62+, Age 34-49, Age 49-62, Age…
## $ siblings <fct> 2, 3, 3, 3, 2, 2, 2, 6+, 5, 1, 4, 4, 3, 6+, 0, 1, 3, 6+,…
## $ kids <fct> 3, 0, 2, 4+, 2, 2, 2, 3, 3, 4+, 4+, 4+, 3, 4+, 4+, 2, 4+…
## $ religion <fct> None, None, Catholic, Catholic, None, None, None, Cathol…
## $ bigregion <fct> Northeast, Northeast, Northeast, Northeast, Northeast, N…
## $ partners_rc <fct> NA, 1, 1, NA, 1, 1, NA, 1, NA, 3, 1, NA, 1, NA, 0, 1, 0,…
## $ obama <dbl> 0, 1, 0, 0, 1, 1, NA, NA, NA, 0, NA, NA, 1, 1, 0, 1, 0, …
colnames(gss_sm)
## [1] "year" "id" "ballot" "age" "childs"
## [6] "sibs" "degree" "race" "sex" "region"
## [11] "income16" "relig" "marital" "padeg" "madeg"
## [16] "partyid" "polviews" "happy" "partners" "grass"
## [21] "zodiac" "pres12" "wtssall" "income_rc" "agegrp"
## [26] "ageq" "siblings" "kids" "religion" "bigregion"
## [31] "partners_rc" "obama"
p <- ggplot(data = gss_sm,
mapping = aes(x = age, y = childs))
p1 <- p + geom_point() +
facet_grid(sex ~ race) +
labs(title = "facet_grid: sex ~ race")
p2 <- p + geom_point() +
facet_grid(~ sex + race) +
labs(title = "facet_grid: ~ sex + race")
p3 <- p + geom_point() +
facet_wrap(~ sex + race) +
labs(title = "facet_wrap: ~sex + race")
# 見やすいように一つのグラフにまとめる
gridExtra::grid.arrange(p1, p2, p3)
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
p.all <- ggpubr::ggarrange(p1, p2, p3) # ggpubr::ggarrange()は複数のプロットを一枚のプロットにする方法
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
## Warning: Removed 18 rows containing missing values (geom_point).
ggsave(filename="comparison_facet.pdf", plot = p.all,
height = 10, width = 10, units = "in")
# 累積折れ線グラフ
# 観測数を棒で表示する代わりに連続した線で表示するもの
colnames(midwest)
## [1] "PID" "county" "state"
## [4] "area" "poptotal" "popdensity"
## [7] "popwhite" "popblack" "popamerindian"
## [10] "popasian" "popother" "percwhite"
## [13] "percblack" "percamerindan" "percasian"
## [16] "percother" "popadults" "perchsd"
## [19] "percollege" "percprof" "poppovertyknown"
## [22] "percpovertyknown" "percbelowpoverty" "percchildbelowpovert"
## [25] "percadultpoverty" "percelderlypoverty" "inmetro"
## [28] "category"
p <- ggplot(data = midwest,
mapping = aes(x = area))
p1 <- p + geom_histogram(binwidth = 0.01)
p2 <- p + geom_freqpoly(binwidth = 0.01)
p.all <- ggpubr::ggarrange(p1, p2)
p.all
ggsave(filename = "histgram_freqpoly.png", plot = p.all,
height = 10, width = 10, units = "in")
# 密度推定
p <- ggplot(data = midwest,
mapping = aes(x = area, fill = state, color = state))
p + geom_density(alpha = 0.1)
p <- ggplot(data = midwest,
mapping = aes(x = percollege, y = percbelowpoverty))
p + geom_point(alpha = 0.2) +
geom_density2d()
p + geom_density2d()
p + geom_density_2d_filled()